Location Analysis

We dont have IP linked with our Users. So we will link UserID with IP and then analyse the IPs to get some understanding on the Locations from where our users are mostly connected and analyse the locations for further improvement on user experience.

First we will connect to our database.


In [ ]:
from database import Database

database = Database(
    '<host name>',
    '<database name>',
    '<user name>',
    '<password>',
    'utf8mb4'
)
connection = database.connect_with_pymysql()

Then we create a relational schema between users and IPs in our database. Then we will insert data in the table.


In [ ]:
if connection:
    try:
        with connection.cursor() as cursor:
            create_schema_sql = "CREATE TABLE logs_user(user_id int(11) DEFAULT NULL, IP varchar(255) DEFAULT NULL) ENGINE=InnoDB DEFAULT CHARSET=utf8"
            cursor.execute(create_schema_sql)
            connection.commit()

            sql = "SELECT logs.ip, users.id FROM logs, users WHERE logs.email=users.email"
            cursor.execute(sql)
            data = cursor.fetchall()

            for record in data:
                insert = "INSERT INTO logs_user(user_id, IP) VALUES('"+str(record['id']) +"','"+str(record['ip'])+"')"
                cursor.execute(insert)
                connection.commit()
                
    finally:
        connection.close()

Now we will create another schema that will use a geo-location API to get the location from the IPs. We have a schema called ips and we will use the ip and forwarded coloumn to get distinct ips. To store the records we will create a location schema.


In [ ]:
if connection:
    try:
        with connection.cursor() as cursor:
            create_schema_sql = "CREATE TABLE location(IP varchar(20) NOT NULL DEFAULT '', longitude varchar(20) DEFAULT NULL, latitude varchar(20) DEFAULT NULL, country varchar(50) DEFAULT NULL, region varchar(70) DEFAULT NULL, city varchar(40) DEFAULT NULL, zip_code varchar(20) DEFAULT NULL, PRIMARY KEY (IP)) ENGINE=InnoDB DEFAULT CHARSET=utf8"
            cursor.execute(create_schema_sql)
            connection.commit()

            sql = "SELECT ip, forwarded FROM ips"
            cursor.execute(sql)
            data = cursor.fetchall()
            
            ip_set = set()
            
            for record in data:
                if record['ip']:
                    parsed_ip = record['ip'].split(',')
                    for i in parsed_ip:
                        ip_set.add(i.strip())
                if record['forwarded']:
                    parsed_ip = record['forwarded'].split(',')
                    for i in parsed_ip:
                        ip_set.add(i.strip())
            print len(ip_set)
    finally:
        connection.close()

In [ ]:
import urllib2
import json
try:
    with connection.cursor() as cursor:
        for i in ip_set:
            data = urllib2.urlopen('http://freegeoip.net/json/'+i)
            received_data = data.read()

            if received_data:
                data = json.loads(received_data)
                country = data['country_code'] + ': ' + data['country_name']
                region = (data['region_code'] + ': ' +data['region_name']).replace("'","")
                city = data['city'].replace("'","")
                latitude = data['latitude']
                longitude = data['longitude']
                zip_code = data['zip_code']
            try:
                insert_sql = "INSERT INTO location(zip_code, country, region, city, latitude, longitude, IP) VALUES('"+zip_code+"', '"+country+"', '"+region+"', '"+city+"', '"+str(latitude)+"', '"+str(longitude)+"', '"+i+"')"
                cursor.execute(insert_sql)
                connection.commit()
            except Exception:
                print i
finally:
    connection.close()